library(readr)
library(dplyr)
library(plotly)
library(tidyverse)
library(lubridate)
library(rvest)
library(stringr)
majordata <- read_csv("majordata.csv")
Research question idea: How do different majors vary in the average number of classes taken in each division? How does
# ----CODE FOR TAB NUMBER 3 IN THE SHINY APP----
# here we are mainly just pivoting the major data into a longer format so that it's easier to work with.
majors <- majordata%>%
pivot_longer(cols = c(`major1`, `major2`, `major3`), names_to = "major#", values_to = "major")%>%
pivot_longer(cols = c(`major1_division`, `major2_division`, `major3_division`), names_to = "major#_division", values_to = "major_division") %>%
filter(!is.na(major), !is.na(major_division)) %>%
filter(`major#` == str_sub(`major#_division`,1,6))%>%
select("StudentID", "major#", "major", "major#_division", "major_division", "FineArts_count", "Humanities_count", "Interdisciplinary_count", "NatScienceandMath_count", "SocialSciences_count", "NonDivisional_count", "admit_type")%>%
rename(`Fine Arts` = FineArts_count)%>%
rename(`Natural Sciences and Mathematics` = NatScienceandMath_count)%>%
rename(`Humanities` = Humanities_count)%>%
rename(`Social Sciences` = SocialSciences_count)%>%
rename(`Interdisciplinary` = Interdisciplinary_count)%>%
rename(`NonDivisional` = NonDivisional_count)
# The number of classes that each student took in the 5 divisions(in long format)
majors2 <- majors%>%
pivot_longer(cols = c(`Fine Arts`, `Humanities`, `Interdisciplinary`, `Natural Sciences and Mathematics`, `Social Sciences`, `NonDivisional`), names_to = "division", values_to = "numOfClasses")
# This is the average number of classes that a student in each major would take across all of the divisions.
Divisions_By_Major <- majors%>%
group_by(major)%>%
summarise(countFA = mean(`Fine Arts`), countHUM = mean(Humanities), countINT = mean(Interdisciplinary), countSCIMATH = mean(`Natural Sciences and Mathematics`), countSOCSCI = mean(`Social Sciences`), countNON = mean(NonDivisional))
# The average number of classes the a student in each major would take outside of their majors division.
OutsideMajor <- majors2%>%
filter(!(major_division == division))%>%
group_by(StudentID, major)%>%
summarise(num = sum(numOfClasses))
# average number of classes taken outside of your majors division(one number summary). The average number of classes taken outside your majors division is 24.67.
OneNumberSummary <- majors2%>%
filter(!(admit_type == "TRN"))%>%
filter(!(major_division == division))%>%
group_by(StudentID)%>%
summarise(num = sum(numOfClasses))%>%
summarise(Average = sum(num)/n())
# average number of classes taken in each division by major, along with the division that each major falls in.
majorsViz <- OutsideMajor%>%
group_by(major)%>%
summarize(avg = mean(num))%>%
arrange(desc(avg))
majorsViz <- majorsViz%>% #join with division_by_major
left_join(majors)%>%
left_join(Divisions_By_Major)%>%
distinct(major, .keep_all = TRUE)%>%
select(major, avg, major_division, countFA, countHUM, countINT, countSCIMATH, countSOCSCI, countNON)
# This is the same data as "majorsViz" above but in the longer format.**This is the data used for the third viz on the shiny app.**
majorsViz2 <- majorsViz%>%
group_by(major_division)%>%
pivot_longer(cols = c(countFA, countHUM, countINT, countSCIMATH, countSOCSCI, countNON), names_to = "division", values_to = "avgClasses")
majorsViz2%>%
group_by(major_division)%>%
summarise(classesOutside = mean(avg))
#----CODE FOR TAB NUMBER 1 IN THE SHINY APP----
# average number of classes taken in each division based on the number of majors a student has.
majo <- majordata %>%
group_by(major_ct) %>%
summarise(fine_arts = mean(FineArts_count),
humanities = mean(Humanities_count),
natscience = mean(NatScienceandMath_count),
social_science = mean(SocialSciences_count),
nonDivis = mean(NonDivisional_count),
inter= mean(Interdisciplinary_count))%>%
rename(`Fine Arts` = fine_arts)%>%
rename(`Humanities` = humanities)%>%
rename(`Social Sciences` = social_science)%>%
rename(`Natural Science and Mathematics` = natscience)%>%
rename(`Non Divisional` = nonDivis)%>%
rename(`Interdisciplinary` = inter)
# long version of the above. **Data used for the first viz on the shiny app.**
majjors <- majo %>%
pivot_longer(cols = !"major_ct", names_to = "division")
#----CODE FOR TAB NUMBER 2 IN THE SHINY APP----
# this is the average number of prefixes for Macalester students(one number summary)
averagePrefix<-majordata %>%
summarize("Average Amount of Prefixes of All Students"=mean(prefix_count,na.rm=TRUE),"Medain Prefix"=median(prefix_count))
averagePrefix
#Filtering out all of the students that have more then one major.
singleMajors<-majordata %>%
filter(is.na(major2))
singleMajors%>%
mutate(across(where(is.character), as.factor)) %>%
summary()
#average number of prefixes taken for each major at Macalester.
avgPrefix<-singleMajors %>%
group_by(major1) %>%
summarize(avgPrefix=mean(prefix_count,na.rm=TRUE)) %>%
arrange(desc(avgPrefix))
#Same as "avgPrefix", but added a variable for the division that each major falls in. **Data used for the second viz on the shiny app.**
avgPrefix2 <- avgPrefix %>%
left_join(singleMajors)%>%
select(major1, avgPrefix, major1_division)%>%
distinct()
# ------ PRACTICE VISUALIZATIONS------
#Visualization No. 1
test <- majorsViz2%>%
ggplot(aes(x = major, y= avgClasses, fill = division))+
geom_col(position = "fill")+
theme(axis.text.x = element_text(angle = 90))
#Visualization No. 2***
viz1 <- majorsViz2%>%
ggplot(aes(x = major, y = avgClasses))+
geom_point(aes(shape = division, color = major_division))+
theme(axis.text.x = element_text(angle = 90))+
labs(title = "Distrubution of Classes Based on Your Major", y = "Average Number of Classes", x = "Major")
#Visualization No. 3
majorsViz2%>%
ggplot(aes(x = major, y = avgClasses))+
geom_point(aes(shape = division, color = major_division))+
theme(axis.text.x = element_text(angle = 90))
ggplotly(test)
ggplotly(viz1)
#This is the custom theme that each one of the visualizations in the shiny app are using.
theme_gppr <- function(){
font <- "Georgia" #assign font family up front
theme_minimal() %+replace% #replace elements we want to change
theme(
#grid elements
panel.grid.major = element_blank(), #strip major gridlines
panel.grid.minor = element_blank(), #strip minor gridlines
axis.ticks = element_blank(), #strip axis ticks
#since theme_minimal() already strips axis lines,
#we don't need to do that again
#text elements
plot.title = element_text( #title
family = font, #set font family
size = 15, #set font size
face = 'bold', #bold typeface
hjust = 0, #left align
vjust = 2), #raise slightly
plot.subtitle = element_text( #subtitle
family = font, #font family
size = 11), #font size
plot.caption = element_text( #caption
family = font, #font family
size = 9, #font size
hjust = 1), #right align
axis.title = element_text( #axis titles
family = font, #font family
size = 10), #font size
axis.text = element_text( #axis text
family = font, #axis famuly
size = 9,angle=90), #font size
axis.text.x = element_text( #margin for axis text
margin=margin(5, b = 10))
#since the legend often requires manual tweaking
#based on plot content, don't define it here
)
}